import numpy as np
from sklearn.neighbors import KNeighborsClassifier as kNN


def dealData(content):
    content[0] = content[0].lstrip('\ufeff')

    # 文件行数
    numLine = len(content)
    dataMat = np.zeros((numLine, 3))
    index = 0
    labels = []
    for line in content:
        line = line.strip()
        stringArray = line.split('\t')
        dataMat[index] = [float(element) for element in stringArray[0:3]]
        labels.append(stringArray[3])
        index += 1

    minVal = dataMat.min(0)
    maxVal = dataMat.max(0)
    dataMat = dataMat - np.tile(minVal, (numLine, 1))
    dataMat = dataMat / np.tile((maxVal - minVal), (numLine, 1))
    return dataMat, labels


def datingPredict():
    # 加载数据
    fr = open("datingTestSet.txt", 'r', encoding='utf-8')
    content = fr.readlines()

    dataMat, labels = dealData(content)
    # 构建kNN分类器
    neigh = kNN(n_neighbors=4, algorithm="brute")
    # 拟合模型, trainingMat为训练矩阵,hwLabels为对应的标签
    lineNum = len(content)
    tail = lineNum / 10
    trainTail = int(lineNum - tail) + 1
    trainMat = dataMat[:trainTail]
    neigh.fit(trainMat, labels[:trainTail])

    testMat, testLabels = dataMat[trainTail:lineNum], labels[trainTail:lineNum]
    index = 0
    error = 0
    for data in testMat:
        m = np.zeros((1, 3))
        m[0] = data
        result = neigh.predict(m)
        if result != testLabels[index]:
            print("result!=realResult")
            error += 1
        print("预测数据：%s ------ 真实数据：%s" % (result, testLabels[index]))
        index += 1
    print("总数：%d     错误数:%d" % (len(testMat), error))


if __name__ == '__main__':
    datingPredict()
